#include <vector>

#include "caffe/layers/shape_index_patch_layer.hpp"
#include "caffe/util/math_functions.hpp"

// debug
#include <iostream>
#include <typeinfo>

namespace caffe {

template <typename Dtype>
void ShapeIndexPatchLayer<Dtype>::LayerSetUp(
        const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
    origin_patch_h_ = this->layer_param_.shape_index_patch_param().origin_patch_h();
    origin_patch_w_ = this->layer_param_.shape_index_patch_param().origin_patch_w();
    origin_h_ = this->layer_param_.shape_index_patch_param().origin_h();
    origin_w_ = this->layer_param_.shape_index_patch_param().origin_w();
    CHECK(bottom[1]->shape(1)/2 == top.size()) << "Point num error!";

    feat_h_ = bottom[0]->height();
    feat_w_ = bottom[0]->width();
    feat_patch_h_ = origin_patch_h_ * feat_h_ / float(origin_h_) + 0.5;
    feat_patch_w_ = origin_patch_w_ * feat_w_ / float(origin_w_) + 0.5;
}   

template <typename Dtype>
void ShapeIndexPatchLayer<Dtype>::Reshape(
        const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
    for(int i = 0; i < top.size(); i++) {
        top[i]->Reshape(bottom[0]->num(), bottom[0]->channels(), feat_patch_h_, feat_patch_w_);
    }
}

template <typename Dtype>
void ShapeIndexPatchLayer<Dtype>::Forward_cpu(
        const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {

    const Dtype* feat_data = bottom[0]->cpu_data();
    const Dtype* pos_data = bottom[1]->cpu_data();

    const int num = bottom[0]->num();
    const int channels = bottom[0]->channels();
    const float r_h = (feat_patch_h_ - 1) / 2.0;
    const float r_w = (feat_patch_w_ - 1) / 2.0;
    
    // offset
    const int os1_im = bottom[0]->count(1);
    const int os2_im = bottom[0]->count(2);
    const int os3_im = bottom[0]->count(3);
    const int os_pos = bottom[1]->count(1);
    const int os1_p = top[0]->count(1);
    const int os2_p = top[0]->count(2);
    const int os3_p = top[0]->count(3);

    for (int n = 0; n < num; n++) {
        // x1, y1, ..., xn, yn
        for(int i = 0; i < top.size(); i++) {
            Dtype* top_data_i = top[i]->mutable_cpu_data();
            // coordinate of the first patch pixel, scale to the feature map coordinate
            const int y = pos_data[n*os_pos + 2*i + 1] * (feat_h_-1) - r_h + 0.5;   
            const int x = pos_data[n*os_pos + 2*i] * (feat_w_-1) - r_w + 0.5;
            
            for (int c = 0; c < channels; c++) {
                for (int ph = 0; ph < feat_patch_h_; ph++) {
                    for(int pw = 0; pw < feat_patch_w_; pw++) {
                        const int y_p = y + ph;
                        const int x_p = x + pw;
                        // set zero if exceed the img bound
                        if(y_p < 0 || y_p >= feat_h_ || x_p < 0 || x_p >= feat_w_) {
                            top_data_i[n*os1_p + c*os2_p + ph*os3_p + pw] = 0;
                        } else {
                            top_data_i[n*os1_p + c*os2_p + ph*os3_p + pw] = 
                                feat_data[n*os1_im + c*os2_im + y_p*os3_im + x_p];
                        }
                    }
                }
            }
        }
    }
}

template <typename Dtype>
void ShapeIndexPatchLayer<Dtype>::Backward_cpu(
    const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {

    Dtype* feat_diff = bottom[0]->mutable_cpu_diff();
    caffe_set(bottom[0]->count(), Dtype(0), feat_diff);  // this must be done before propagating, because caffe don't set 0 to bottom_diff
    Dtype* pos_diff = bottom[1]->mutable_cpu_diff();
    caffe_set(bottom[1]->count(), Dtype(0), pos_diff);  // this must be done before propagating, because caffe don't set 0 to bottom_diff

    const Dtype* pos_data = bottom[1]->cpu_data();

    const int num = bottom[0]->num();
    const int channels = bottom[0]->channels();
    const float r_h = (feat_patch_h_ - 1) / 2.0;
    const float r_w = (feat_patch_w_ - 1) / 2.0;
    
    // offset
    const int os1_im = bottom[0]->count(1);
    const int os2_im = bottom[0]->count(2);
    const int os3_im = bottom[0]->count(3);
    const int os_pos = bottom[1]->count(1);
    const int os1_p = top[0]->count(1);
    const int os2_p = top[0]->count(2);
    const int os3_p = top[0]->count(3);

    if (propagate_down[0]) {
        for (int n = 0; n < num; n++) {
            // x1, y1, ..., xn, yn
            for(int i = 0; i < top.size(); i++) {
                const Dtype* top_diff_i = top[i]->cpu_diff();
                // coordinate of the first patch pixel, scale to the feature map coordinate
                const int y = pos_data[n*os_pos + 2*i + 1] * (feat_h_-1) - r_h + 0.5;   
                const int x = pos_data[n*os_pos + 2*i] * (feat_w_-1) - r_w + 0.5;
                
                for (int c = 0; c < channels; c++) {
                    for (int ph = 0; ph < feat_patch_h_; ph++) {
                        for(int pw = 0; pw < feat_patch_w_; pw++) {
                            const int y_p = y + ph;
                            const int x_p = x + pw;
                            // set zero if exceed the img bound
                            if(y_p >= 0 && y_p < feat_h_ && x_p >= 0 && x_p < feat_w_) {
                                feat_diff[n*os1_im + c*os2_im + y_p*os3_im + x_p] += top_diff_i[n*os1_p + c*os2_p + ph*os3_p + pw];
                            }
                        }
                    }
                }
            }
        }
    }
}

#ifdef CPU_ONLY
//STUB_GPU(ShapeIndexPatchLayer);
#endif

INSTANTIATE_CLASS(ShapeIndexPatchLayer);
REGISTER_LAYER_CLASS(ShapeIndexPatch);

}  // namespace caffe
